Part 1: AIR DOSE UNDERSTANDING

Part I: Data Loading, Cleaning and Exploring

The Air Dose Rate measurements were collected from three sources:

  1. Cars and Buses with Sensors: Measure Air of 100m&#0178 from the road
  2. Un manned Auto Helicopter: Measure from 300m above ground
  3. Fixed Sensors: Geographically sparsed

1.11 Required Packages

library(leaflet)
library(readr)
library(dplyr)
library(RColorBrewer)
library(Hmisc)
library(ggplot2)
library(formatR)
1.11 Loading June 2011 and Nov 2013 Fukushima Data and selecting Fukushima’s.
fuk2011 <- read.csv("jun_2011_fukushima.csv")
fuk2013 <- read.csv("nov_2013_fukushima.csv")
1.12 Change to machine readeable column names
names(fuk2011) <- c("gridcode", "pref", "city", "gridCenterNorthlat", 
    "gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec", 
    "daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat", 
    "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong", 
    "SE_nLat", "SE_eLong")
names(fuk2013) <- c("gridcode", "pref", "city", "gridCenterNorthlat", 
    "gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec", 
    "daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat", 
    "NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong", 
    "SE_nLat", "SE_eLong")
dim(fuk2013)
## [1] 119522     18
dim(fuk2011)
## [1] 45273    18
max(fuk2013$AvgAirDoseRate)
## [1] 39
min(fuk2013$AvgAirDoseRate)
## [1] 0.009
max(fuk2011$AvgAirDoseRate)
## [1] 37
min(fuk2011$AvgAirDoseRate)
## [1] 0.008
1.14 Create air dose quantiles that are plot-able,i.e 6 categorical variables.
fuk2011_q <- fuk2011 %>% mutate(dose_quants = cut2(fuk2011$AvgAirDoseRate, 
    cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))
fuk2013_q <- fuk2013 %>% mutate(dose_quants = cut2(fuk2013$AvgAirDoseRate, 
    cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))

# write.csv(fuk2011_q, file='fuk2011dose.csv',row.names =
# FALSE) write.csv(fuk2013_q,
# file='fuk2013dose.csv',row.names = FALSE)

fuk2013dose <- read.csv("fuk2013dose.csv")
# remove the slash on grides
fuk2013dose$gridcode <- gsub("_", "", fuk2013dose$gridcode)
# remove the last character on grides, 250m to 500m
fuk2013dose$gridcode <- gsub(".{1}$", "", fuk2013dose$gridcode)
  • Visible reduction of Average Air Dose Distribution by half in Fukushima.
  • Trouble is knowing the distribution of causes of this reduction?
1.15 Color function
iro <- colorFactor(palette = "YlOrRd", domain = fuk2011_q$dose_quants)
iro2013 <- colorFactor(palette = "YlOrRd", domain = fuk2013_q$dose_quants)
# Link of Daichi
fukulink <- paste(sep = "<br/>", "<br><a href='http://www.tepco.co.jp
                  /en/decommision/index-e.html'>Fukushima Daichi</a></b>", 
    "Source of radiations")
1.16 Fukushima Average Air Dose Rate for 2011
fuk2011_plot <- leaflet() %>% addTiles() %>% addRectangles(data = fuk2011_q, 
    lng1 = ~SW_eLong, lat1 = ~SW_nLat, lng2 = ~NE_eLong, lat2 = ~NE_nLat, 
    color = ~iro(fuk2011_q$dose_quants)) %>% addLegend("bottomright", 
    pal = iro, values = fuk2011_q$dose_quants, title = "AvgAirDoseRate", 
    labFormat = labelFormat(prefix = "µSv/h "), opacity = 1) %>% 
    addPopups(lat = 37.4211, lng = 141.0328, popup = fukulink, 
        options = popupOptions(closeButton = TRUE))
fuk2011_plot


1.17 Fukushima Average Air Dose Rate for 2013
fuk2013_plot <- leaflet() %>%
        addTiles()%>%
        addRectangles(data = fuk2013_q,lng1 = ~SW_eLong, lat1 = ~SW_nLat,
                      lng2 = ~NE_eLong, lat2 = ~NE_nLat,
                      color = ~iro2013(fuk2013_q$dose_quants))%>%
        addLegend("bottomright", pal = iro2013, values = fuk2013_q$dose_quants,
                  title = "AvgAirDoseRate",
                  labFormat = labelFormat(prefix = "µSv/h "),
                  opacity = 1)%>%
        addPopups(lat = 37.4211, lng = 141.0328,popup = fukulink,
                  options = popupOptions(closeButton = TRUE)) 
fuk2013_plot


1.18 Fukushima 2011, Counts of Measuring Locations per Air Dose Rate
ggplot(fuk2011_q, aes(daichi_distance,AvgAirDoseRate)) +
        geom_point() +
        geom_smooth(se = FALSE)+
        ggtitle("AvgAirDose against Distance to Daichi Plant")

1.19 Fukushima 2011,Counts of Measuring Locations per Air Dose Rate
ggplot(data = fuk2013_q) +
        geom_bar(mapping = aes(x = daichi_distance, fill = dose_quants), width = 1)+
        ggtitle("AvgAirDose Measured Counts against Daichi Distance")

2.10 Air Dose Rate Reduction due to Distance to Fukushima Daichi

\[AvgAirDoseRate = \beta_{0} + \beta_{1} daichi distance + \epsilon_{i}\]

fit1 <- lm(AvgAirDoseRate~daichi_distance, data = fuk2011)
summary(fit1)
## 
## Call:
## lm(formula = AvgAirDoseRate ~ daichi_distance, data = fuk2011)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.226 -0.901 -0.321  0.165 34.291 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.7838704  0.0267272  104.16   <2e-16 ***
## daichi_distance -0.0288916  0.0004048  -71.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.278 on 45271 degrees of freedom
## Multiple R-squared:  0.1012, Adjusted R-squared:  0.1011 
## F-statistic:  5095 on 1 and 45271 DF,  p-value: < 2.2e-16
#Confidence interval
confint(fit1)
##                       2.5 %      97.5 %
## (Intercept)      2.73148462  2.83625613
## daichi_distance -0.02968495 -0.02809831

2.20 Air Dose Rate Reduction due to Population Density and Land Use

\[AvgAirDoseRate = \beta_{0} + \beta_{1} popn density + \beta_{2} human activities + \epsilon_{i}\]

Part 2: POPULATION UNDERSTANDING

Population density and land usage is measured on a 500m² basis

fuk_pop <- read.csv("fuk.csv")
# Convert mesh gridecode to lat/long
mesh_latlong <- function (mesh, loc = "C")
{
    mesh <- as.character(mesh)

    if (length(grep("^[0-9]{4}", mesh)) == 1) { 
        mesh12 <- as.numeric(substring(mesh, 1, 2))
        mesh34 <- as.numeric(substring(mesh, 3, 4))
        lat_width  <- 2 / 3;
        long_width <- 1;
    }
    else {
        return(NULL)
    }

    if (length(grep("^[0-9]{6}", mesh)) == 1) { 
        mesh5 <- as.numeric(substring(mesh, 5, 5))
        mesh6 <- as.numeric(substring(mesh, 6, 6))
        lat_width  <- lat_width / 8;
        long_width <- long_width / 8;
    }

    if (length(grep("^[0-9]{8}", mesh)) == 1) { 
        mesh7 <- as.numeric(substring(mesh, 7, 7))
        mesh8 <- as.numeric(substring(mesh, 8, 8))
        lat_width  <- lat_width / 10;
        long_width <- long_width / 10;
    }

    lat  <- mesh12 * 2 / 3;          
    long <- mesh34 + 100;

    if (exists("mesh5") && exists("mesh6")) {  
        lat  <- lat  + (mesh5 * 2 / 3) / 8;
        long <- long +  mesh6 / 8;
    }
    if (exists("mesh7") && exists("mesh8")) {  
        lat  <- lat  + (mesh7 * 2 / 3) / 8 / 10;
        long <- long +  mesh8 / 8 / 10;
    }

    if (loc == "C") {   
        lat  <-  lat  + lat_width  / 2;
        long <-  long + long_width / 2;
    }
    if (length(grep("N", loc)) == 1) {  
        lat  <- lat  + lat_width;
    }
    if (length(grep("E", loc) == 1)) {  
        long <- long +long_width;
    }

    lat  <- sprintf("%.8f", lat); 
    long <- sprintf("%.8f", long);

    x <- list(as.numeric(lat), as.numeric(long))
    names(x) <- c("lat", "long")

    return (x)
}
# j <- mesh_latlong(mesh = 564000463 , loc = "C")
# class(j);print(j);length(j)
grides <- fuk_pop[,1]
head(grides);class(grides);length(grides)
## [1] 564000194 564000364 564000382 564000393 564000462 564000463
## [1] "integer"
## [1] 10831
#create the lat/long
mylist <- list()
for (i in 1:length(grides) ){
        lis <- mesh_latlong(mesh = grides[i], loc = "C")
        mylist[[i]] <- lis
}

res <- as.data.frame(mylist)
# View(res)
#test
# df <- data.frame(matrix(unlist(res), nrow=10831, byrow=T))
# df <- ldply (res, data.frame)

#library(data.table)
# df <- as.data.frame(rbindlist(mylist, fill=TRUE))
# df[,"gridcode"] <- grides 
# View(df)
# merge gridcode and lat/lon datasets of Fuk population
# fuk_ll <- merge(fuk_pop, df, by.x = "gridcode", by.y = "gridcode", all = TRUE)
# write.csv(fuk_pop, file="fuk_pop.csv",row.names = FALSE)
#load new dataset
#fuk_pop <- read.csv("fuk_pop.csv")
# no_unique <- length(unique(fuk_pop$gridcode))

Part 3: LAND USE UNDERSTANDING

http://nlftp.mlit.go.jp/ksj-e/old/cgi-bin/_download_view.cgi Forest and Public Lands mesh Land Classification Mesh

Agricultural Areas (surface):A12-60A-48-01.1.zip 10.8MB Agricultural Areas (surface):A12-60A-48-01.0a.zip 10.82MB

What constitutes Land Use?:

  1. Farming: Short life span crops lead to frequent land tilling
  2. Cleaned Places: Parks, GolfCourses, Gardens are cleaned too and could reduce radiations

Part 4: USING MACHINE LEARNING MACHINE ALGORITHMS TO DISCOVER RELATIONSHIP AMONG POPULATION, AIR DOSE AND LAND USE

The Fukushima Nuclear Radiations is multi dimensional;

  • Three major Isotopes each with;
  • Time series due to half life
  • Magnitude of radiation (Becquerel)
  • Absorbed dose (Sievert (Sv))
  • The above dimensionalities coupled with distance,population density and land use, create a data set that can be run on extensive machine learning algorithms like Support Vector Machines, Random Forest, Recurrent Neural Networks and more.

    end